##9/17/2015
##Replication_Code_2
## This script reproduces Fig 2
##-- words per speech over time
##-- number of speeches per MP over time

rm(list=ls())

#load the summary data: for each session (1st col) it is the mp_code (2nd col) for each speech
#followed by the number of speeches he made (3rd col) 
load("./../data/summary_speech.rdata")
# load("C:/Users/as9934/Dropbox/HansardProject/burstiness/bjps_replication/data/summary_speech.rdata")
summary.data$word.count <- as.numeric(summary.data$word.count) 

#session info
session.list <- c("1832_1", "1832_2", "1835_1", "1835_2", "1835_3", "1837_1", 
"1837_2", "1837_3", "1837_4", "1841_1", "1841_2", "1841_3", "1841_4", 
"1841_5", "1841_6", "1841_7", "1847_1", "1847_2", "1847_3", "1847_4", 
"1847_5", "1852_1", "1852_2", "1852_3", "1852_4", "1852_5", "1857_1", 
"1857_2", "1857_3", "1859_1", "1859_2", "1859_3", "1859_4", "1859_5", 
"1859_6", "1859_7", "1865_1", "1865_2", "1865_3", "1868_1", "1868_2", 
"1868_3", "1868_4", "1868_5", "1874_1", "1874_2", "1874_3", "1874_4", 
"1874_5", "1874_6", "1874_7", "1880_1", "1880_2", "1880_3", "1880_4", 
"1880_5", "1880_6", "1885_1", "1886_1", "1886_2", "1886_3", "1886_4", 
"1886_5", "1886_6", "1886_7", "1892_1", "1892_2", "1892_3", "1892_4", 
"1895_1", "1895_2", "1895_3", "1895_4", "1895_5", "1895_6", "1895_7", 
"1900_1", "1900_2", "1900_3", "1900_4", "1900_5", "1900_6", "1906_1", 
"1906_2", "1906_3", "1906_4", "1910A_1")



#vectors for results
word.count.per.speech <- c()
speech.count.per.MP <-c()

for (i in 1:length(session.list)){
      
  CL <- subset(summary.data, summary.data$session==session.list[i])
  word.count.per.speech <- c(word.count.per.speech, sum(CL$word.count)/nrow(CL))
  speech.count.per.MP <-  c(speech.count.per.MP, mean(table(CL$mp_code)) )


}

#cols and pch --  just for clean display

gov.cols<- c("gold", "gold", "gold", "gold", "gold", "gold", "gold", "gold", "gold", "gold", "gold", "gold", "gold", "gold", "gold", "gold", 
  "gold", "gold", "lightblue", "lightblue", "lightblue", "lightblue", "lightblue", "lightblue", "lightblue", "lightblue", "lightblue", 
  "lightblue", "lightblue", "lightblue", "gold", "gold", "gold",  "gold", "gold", "gold", "gold", "gold", "gold", "gold", "gold", 
  "gold", "lightblue", "lightblue", "gold", "gold", "gold", "gold", "gold", "gold", "gold", "gold", "gold", "gold", "lightblue", 
  "lightblue", "lightblue", "lightblue", "gold", "gold", "gold",  "gold", "gold", "gold", "gold", "gold", "gold", "gold", "gold", 
  "gold", "gold", "gold", "lightblue", "lightblue", "lightblue",  "lightblue", "lightblue", "lightblue", "gold", "gold", "gold", 
  "gold", "gold", "gold", "gold", "gold", "gold", "gold", "lightblue",   "lightblue", "lightblue", "lightblue", "lightblue", "lightblue", 
  "lightblue", "lightblue", "lightblue", "lightblue", "lightblue",  "lightblue", "lightblue", "lightblue", "gold", "gold", "gold", 
  "gold", "gold", "gold", "gold", "gold", "gold", "gold", "lightblue",   "lightblue", "lightblue", "lightblue", "lightblue", "lightblue", 
  "lightblue", "lightblue", "lightblue", "lightblue", "lightblue",   "lightblue", "lightblue", "lightblue", "lightblue", "lightblue", 
  "lightblue", "lightblue", "lightblue", "lightblue", "gold", "gold",   "gold", "gold", "gold", "gold", "lightblue", "lightblue", "lightblue", 
  "lightblue", "lightblue", "lightblue", "lightblue", "lightblue",   "lightblue", "lightblue", "lightblue", "lightblue", "lightblue", 
  "lightblue", "lightblue", "lightblue", "lightblue", "lightblue",   "lightblue", "lightblue", "lightblue", "lightblue", "lightblue", 
  "lightblue", "lightblue", "lightblue", "gold", "gold", "gold",   "gold", "gold", "gold", "gold", "gold", "gold", "gold")


gov.pch<- c(22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 
  22, 22, 22, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 
  22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 22, 22, 22, 
  22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 22, 22, 22, 22, 22, 
  22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 21, 21, 22, 
  22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 21, 21, 21, 
  21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 
  22, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 
  21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 21, 
  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 
  21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22)


#do a summary plot - MPs and numbers of speeches
par(mfrow=c(1,1))
par(bg='cornsilk')
par(mar=c(4,4,2,4))


plot(1:length(session.list), word.count.per.speech, bg=gov.cols, col='black' ,  pch = gov.pch, cex=1.5, axes=F, 
     xlab="", ylab="Words/speech")
axis(1, at=1:length(session.list), labels= session.list , las=2, cex.axis=.7)
axis(2)
box()
par(new=TRUE) 
plot(1:length(session.list), speech.count.per.MP, type ="l", axes=F, xlab="", ylab="", lwd=2  ) 
axis(4)
mtext("Speeches/MP", side=4, line=3)
